<?xml version="1.0" encoding="utf-8"?>
<!-- Created by Leo: http://leoeditor.com/leo_toc.html -->
<leo_file xmlns:leo="http://leoeditor.com/namespaces/leo-python-editor/1.1" >
<leo_header file_format="2"/>
<globals/>
<preferences/>
<find_panel_settings/>
<vnodes>
<v t="caminhante.20200309141113.1"><vh>@settings</vh>
<v t="caminhante.20200309141113.2"><vh>NodeActions</vh>
<v t="caminhante.20200309141113.3"><vh>^@(file|clean) .*\.[ch] [X]</vh></v>
<v t="caminhante.20200309141113.4"><vh>@run*</vh></v>
</v>
</v>
<v t="caminhante.20200309142214.1"><vh>Utils</vh>
<v t="caminhante.20200309141700.1"><vh>@run fossil status</vh></v>
<v t="caminhante.20200309141709.1"><vh>@run sync Git repo</vh></v>
<v t="caminhante.20200309142127.1"><vh>@run create Git repo</vh></v>
</v>
<v t="caminhante.20200309141027.3"><vh>Minimal UTF-8 support</vh>
<v t="caminhante.20200309141415.1"><vh>@auto README.md</vh></v>
<v t="caminhante.20200309141148.1"><vh>@clean ./utf8.c</vh>
<v t="caminhante.20200309141148.2"><vh>static within</vh></v>
<v t="caminhante.20200309141148.3"><vh>static ascii_char</vh></v>
<v t="caminhante.20200309141148.4"><vh>static utf8_2bytes_char</vh></v>
<v t="caminhante.20200309141148.5"><vh>static utf8_3bytes_char</vh></v>
<v t="caminhante.20200309141148.6"><vh>static utf8_4bytes_char</vh></v>
<v t="caminhante.20200309141148.7"><vh>uchar_valid</vh></v>
<v t="caminhante.20200309141148.8"><vh>uchar_bytes</vh></v>
<v t="caminhante.20200309141148.9"><vh>ustring_length</vh></v>
<v t="caminhante.20200309141148.10"><vh>ustring_bytes</vh></v>
<v t="caminhante.20200309141148.11"><vh>cstring_bytes</vh></v>
<v t="caminhante.20200309141148.12"><vh>next_uchar</vh></v>
<v t="caminhante.20200309141148.13"><vh>c_to_ustring</vh></v>
<v t="caminhante.20200309141148.14"><vh>u_to_cstring</vh></v>
<v t="caminhante.20200309141148.15"><vh>uchar_puts</vh></v>
<v t="caminhante.20200309141148.16"><vh>ustring_puts</vh></v>
</v>
<v t="caminhante.20200309141158.1"><vh>@clean ./utf8.h</vh>
<v t="caminhante.20200309141158.2"><vh>uchar_valid</vh></v>
<v t="caminhante.20200309141158.3"><vh>uchar_bytes</vh></v>
<v t="caminhante.20200309141158.4"><vh>ustring_length</vh></v>
<v t="caminhante.20200309141158.5"><vh>ustring_bytes</vh></v>
<v t="caminhante.20200309141158.6"><vh>cstring_bytes</vh></v>
<v t="caminhante.20200309141158.7"><vh>next_uchar</vh></v>
<v t="caminhante.20200309141158.8"><vh>c_to_ustring</vh></v>
<v t="caminhante.20200309141158.9"><vh>u_to_cstring</vh></v>
<v t="caminhante.20200309141158.10"><vh>uchar_puts</vh></v>
<v t="caminhante.20200309141158.11"><vh>ustring_puts</vh></v>
</v>
<v t="caminhante.20200309145700.1"><vh>@clean Makefile</vh></v>
</v>
</vnodes>
<tnodes>
<t tx="caminhante.20200309141027.3"></t>
<t tx="caminhante.20200309141113.1"></t>
<t tx="caminhante.20200309141113.2">@language python</t>
<t tx="caminhante.20200309141113.3">import subprocess,os

if c.isChanged(): c.save()
os.chdir(c.getNodePath(pClicked))
filename = ' '.join(pClicked.h.split()[1:])
g.es('gcc -c ' + filename)

proc = subprocess.Popen(['gcc','-std=gnu99','-Wall','-Werror','-Wfatal-errors','-D_GNU_SOURCE',filename,'-c','-o','/dev/null'],
    stderr=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)

while True:
    data = proc.stdout.read()
    if len(data) == 0: break
    g.es(data)

while True:
    data = proc.stderr.read()
    if len(data) == 0: break
    g.es(data)
</t>
<t tx="caminhante.20200309141113.4">@language python
import subprocess

def getpath (p):
    dict = c.scanAllDirectives(p)
    d = dict.get("path")
    if p.isAnyAtFileNode():
        filename = p.anyAtFileNodeName()
        filename = g.os_path_join(d,filename)
        if filename:
            d = g.os_path_dirname(filename)
    if d is None:
        return ""
    else:
        return g.os_path_normpath(d)

def execute (cmd):
    # return subprocess.run(cmd,shell=True,universal_newlines=True,stderr=subprocess.STDOUT,stdout=subprocess.PIPE)
    return subprocess.check_output(cmd,shell=True,universal_newlines=True,stderr=subprocess.STDOUT)

path = getpath(c.p)
command = c.p.b
cmdname = c.p.h
g.es('---- '+cmdname+' ----')
g.es(execute('cd "'+path+'";\n'+command))
g.es('---- end ----')</t>
<t tx="caminhante.20200309141148.1">#include "utf8.h"

@others
</t>
<t tx="caminhante.20200309141148.10">size_t ustring_bytes (char* source) {
  size_t a, p=0;
  do {
    a = uchar_bytes(source+p);
    p += a;
  } while (a);
  return p;
}
</t>
<t tx="caminhante.20200309141148.11">size_t cstring_bytes (struct uchar* source) {
  size_t a = 0;
  while(source-&gt;bytes) {
    a += source-&gt;bytes;
    source++;
  }
  return a;
}
</t>
<t tx="caminhante.20200309141148.12">struct uchar next_uchar (char* source) {
  size_t bytes = uchar_bytes(source);
  if (bytes == 0) return (struct uchar){0};
  struct uchar uc = (struct uchar){
    .bytes=bytes,
    .chars[0]=source[0],
    .chars[1]=(bytes&gt;=2 ? source[1] : 0),
    .chars[2]=(bytes&gt;=3 ? source[2] : 0),
    .chars[3]=(bytes==4 ? source[3] : 0)
  };
  return uc;
}
</t>
<t tx="caminhante.20200309141148.13">void c_to_ustring (char* source, struct uchar* destination) {
  char *p = source;
  struct uchar uc, *us = destination;
  do {
    uc = next_uchar(p);
    p += uc.bytes;
    *us = uc;
    us ++;
  } while (uc.bytes);
}
</t>
<t tx="caminhante.20200309141148.14">void u_to_cstring (struct uchar* source, char* destination) {
  struct uchar *us = source;
  char *p = destination;
  while (us-&gt;bytes) {
    memcpy(p,us-&gt;chars,us-&gt;bytes);
    p += us-&gt;bytes;
    us ++;
  }
  p[0] = '\0';
}
</t>
<t tx="caminhante.20200309141148.15">size_t uchar_puts (int fileno, struct uchar *uc) {
  return write(fileno, uc-&gt;chars, uc-&gt;bytes);
}
</t>
<t tx="caminhante.20200309141148.16">size_t ustring_puts (int fileno, struct uchar *ustring) {
  size_t written = 0;
  while (ustring-&gt;bytes != 0) {
    size_t a = uchar_puts(fileno,ustring);
    if (a &lt; ustring-&gt;bytes) break;
    written+=a;
    ustring++;
  }
  return written;
}
</t>
<t tx="caminhante.20200309141148.2">static inline bool within (unsigned char value, unsigned char lower, unsigned char greater) {
  return lower&lt;=value &amp;&amp; value&lt;=greater;
}
</t>
<t tx="caminhante.20200309141148.3">static bool ascii_char (char* source) {
  return ((signed char)source[0]) &gt;0;
}
</t>
<t tx="caminhante.20200309141148.4">static bool utf8_2bytes_char (char* source) {
  char a = source[0], b = source[1];
  return within(a,0xC2,0xDF) &amp;&amp; within(b,0x80,0xBF);
}
</t>
<t tx="caminhante.20200309141148.5">static bool utf8_3bytes_char (char* source) {
  char a = source[0], b = source[1], c = source[2];
  return
    (a==0xE0             &amp;&amp; within(b,0xA0,0xBF) &amp;&amp; within(c,0x80,0xBF)) ||
    (within(a,0xE1,0xEC) &amp;&amp; within(b,0x80,0xBF) &amp;&amp; within(c,0x80,0xBF)) ||
    (a==0xED             &amp;&amp; within(b,0x80,0x9F) &amp;&amp; within(c,0x80,0xBF)) ||
    (within(a,0xEE,0xEF) &amp;&amp; within(b,0x80,0xBF) &amp;&amp; within(c,0x80,0xBF));
}
</t>
<t tx="caminhante.20200309141148.6">static bool utf8_4bytes_char (char* source) {
  char a = source[0], b = source[1], c = source[2], d = source[3];
  return
    (a==0xF0             &amp;&amp; within(b,0x90,0xBF) &amp;&amp; within(c,0x80,0xBF) &amp;&amp; within(d,0x80,0xBF)) ||
    (within(a,0xF1,0xF3) &amp;&amp; within(b,0x80,0x8F) &amp;&amp; within(c,0x80,0xBF) &amp;&amp; within(d,0x80,0xBF)) ||
    (a==0xF4             &amp;&amp; within(b,0x80,0x8F) &amp;&amp; within(c,0x80,0xBF) &amp;&amp; within(d,0x80,0xBF));
}
</t>
<t tx="caminhante.20200309141148.7">bool uchar_valid (char* source) {
  return ascii_char(source) || utf8_2bytes_char(source) ||
    utf8_3bytes_char(source) || utf8_4bytes_char(source);
}
</t>
<t tx="caminhante.20200309141148.8">size_t uchar_bytes (char* source) {
  return ascii_char(source) ? 1 :
    utf8_2bytes_char(source) ? 2 :
    utf8_3bytes_char(source) ? 3 :
    utf8_4bytes_char(source) ? 4 : 0;
}
</t>
<t tx="caminhante.20200309141148.9">size_t ustring_length (char* source) {
  size_t length = 0, a, p=0;
  do {
    a = uchar_bytes(source+p);
    p += a;
    if (a) length ++;
  } while (a);
  return length;
}
</t>
<t tx="caminhante.20200309141158.1">#ifndef _UTF8_H_
#define _UTF8_H_

#include &lt;stdlib.h&gt;
#include &lt;stdio.h&gt;
#include &lt;stdint.h&gt;
#include &lt;string.h&gt;
#include &lt;stdbool.h&gt;
#include &lt;unistd.h&gt;

// A unicode string is a array of `struct uchar` objects, terminated with a 'struct uchar' with `.bytes == 0`.
// A `\0` byte isn't considered a valid unicode char.
struct uchar {
  uint8_t bytes;
  union {
    char chars[4];
    uint32_t ichars;
  };
};

@others

#endif
</t>
<t tx="caminhante.20200309141158.10">// [ a `struct uchar` object =&gt;
  // side effect: output UTF8 byte sequence at file descriptor, returns number of written bytes ]
size_t uchar_puts (int fileno, struct uchar* uc);
</t>
<t tx="caminhante.20200309141158.11">// [ sequence of `struct uchar` objects =&gt;
  // side effect: output all UTF8 byte sequences at file descriptor, returns number of written bytes ]
size_t ustring_puts (int fileno, struct uchar* ustring);
</t>
<t tx="caminhante.20200309141158.2">// [ valid UTF8 byte sequence =&gt; true | false ]
bool uchar_valid (char* source);
</t>
<t tx="caminhante.20200309141158.3">// [ valid UTF8 byte sequence =&gt;
  // number of bytes occupied by a valid UTF8 byte sequence, between 1 and 4 | 0 ]
size_t uchar_bytes (char* source);
</t>
<t tx="caminhante.20200309141158.4">// [ sequence of valid UTF8 byte sequences =&gt;
  // number of valid consecutive UTF8 byte sequences, greater or equal than 1 | 0 ]
size_t ustring_length (char* source);
</t>
<t tx="caminhante.20200309141158.5">// [ sequence of valid UTF8 byte sequences =&gt;
  // the number of bytes occupied by valid consecutive UTF8 byte sequences,
  // greater or equal than 1 | 0 ]
size_t ustring_bytes (char* source);
</t>
<t tx="caminhante.20200309141158.6">// [ sequence of `struct uchar` UTF byte sequences =&gt;
  // number of bytes required to convert it to a conventional `\0` terminated `char` array ]
size_t cstring_bytes (struct uchar* source);
</t>
<t tx="caminhante.20200309141158.7">// [ valid UTF8 byte sequence =&gt;
  // a correctly initializated `struct uchar` object
  // and side effects: source position is incremented |
  // a `struct uchar` object with `.bytes == 0` ]
struct uchar next_uchar (char* source);
</t>
<t tx="caminhante.20200309141158.8">// [ a `char` array containing potentially valid UTF8 text =&gt;
  // a `struct uchar` array with all consecutive UTF8 valid byte sequences is written at `*destination` ]
// You need to calc the needed `struct uchar` array length beforehand,
  // with `ustring_length(source)`
void c_to_ustring (char* source, struct uchar* destination);
</t>
<t tx="caminhante.20200309141158.9">// [ a `struct uchar` array containing potentially valid UTF8 text =&gt;
  // a `\0` terminated `char` array is written at `*destination` ]
// You need to calc the needed `char` array length beforehand, summing all
  // `struct uchar` `.bytes` members plus 1 (accounting for a extra `\0` byte at the end
void u_to_cstring (struct uchar* source, char* destination);
</t>
<t tx="caminhante.20200309141700.1">fossil status</t>
<t tx="caminhante.20200309141709.1">cd git
fossil2git.sh ../minimal_UTF8.fossil
git push --set-upstream origin trunk
echo</t>
<t tx="caminhante.20200309142127.1">mkdir ./git
cd ./git
git init
git remote add origin git@notabug.org:XCaminhante/minimal_UTF8.git</t>
<t tx="caminhante.20200309142214.1"></t>
<t tx="caminhante.20200309145700.1">@tabwidth 5
CFLAGS := -std=gnu99 -Wall -Werror -Wfatal-errors -D_GNU_SOURCE -O3

all: build/libminiutf8.so build/libminiutf8.a

build/utf8.o: utf8.c utf8.h
	@mkdir -p build
	gcc $(CFLAGS) $&lt; -c -o $@


build/libminiutf8.a: build/utf8.o
	ar cr $@ $&lt;
	ranlib $@

build/libminiutf8.so: build/utf8.o
	gcc -shared $&lt; -o $@

clean:
	rm -rfv build</t>
</tnodes>
</leo_file>
